In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
from matplotlib import pyplot as plt
import seaborn as sns
pd.options.display.float_format ='{:,.2f}'.format
pd.options.display.max_rows = None
pd.options.display.max_columns = None
In [2]:
def outlire(data):
min = data.min()
q1= data.quantile(.25)
q2= data.median()
q3= data.quantile(.75)
iqr= q3-q1
lb=q1-(1.5*iqr)
ub=q3+(1.5*iqr)
print("lb",lb)
print("ub",ub)
In [3]:
df=pd.read_csv("madrid_cleaned_data.csv")
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 21177 entries, 0 to 21176 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Unnamed: 0 21177 non-null int64 1 id 21177 non-null int64 2 district 21177 non-null object 3 sq_mt_built 21177 non-null float64 4 n_rooms 21177 non-null int64 5 n_bathrooms 21177 non-null int64 6 buy_price 21177 non-null int64 7 buy_price_by_area 21177 non-null int64 dtypes: float64(1), int64(6), object(1) memory usage: 1.3+ MB
In [4]:
df.head()
Out[4]:
| Unnamed: 0 | id | district | sq_mt_built | n_rooms | n_bathrooms | buy_price | buy_price_by_area | |
|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 21742 | San Cristóbal | 64.00 | 2 | 1 | 85000 | 1328 |
| 1 | 1 | 21741 | Los Ángeles | 70.00 | 3 | 1 | 129900 | 1856 |
| 2 | 2 | 21740 | San Andrés | 94.00 | 2 | 2 | 144247 | 1535 |
| 3 | 3 | 21739 | San Andrés | 64.00 | 2 | 1 | 109900 | 1717 |
| 4 | 4 | 21738 | Los Rosales | 108.00 | 2 | 2 | 260000 | 2407 |
In [5]:
df.describe()
Out[5]:
| Unnamed: 0 | id | sq_mt_built | n_rooms | n_bathrooms | buy_price | buy_price_by_area | |
|---|---|---|---|---|---|---|---|
| count | 21,177.00 | 21,177.00 | 21,177.00 | 21,177.00 | 21,177.00 | 21,177.00 | 21,177.00 |
| mean | 10,885.42 | 10,856.58 | 148.65 | 3.04 | 2.08 | 644,503.66 | 4,022.38 |
| std | 6,292.32 | 6,292.32 | 134.65 | 1.42 | 1.34 | 747,510.50 | 1,910.83 |
| min | 0.00 | 1.00 | 16.00 | 1.00 | 1.00 | 36,000.00 | 536.00 |
| 25% | 5,419.00 | 5,413.00 | 71.00 | 2.00 | 1.00 | 199,000.00 | 2,550.00 |
| 50% | 10,919.00 | 10,823.00 | 101.00 | 3.00 | 2.00 | 379,000.00 | 3,723.00 |
| 75% | 16,329.00 | 16,323.00 | 165.00 | 4.00 | 2.00 | 765,000.00 | 5,000.00 |
| max | 21,741.00 | 21,742.00 | 999.00 | 24.00 | 14.00 | 8,800,000.00 | 18,889.00 |
In [6]:
# numerical columns
plt.figure(figsize=(20,8))
for e , i in enumerate(df.columns[3:]): #e= index , i =numercal columns
plt.subplot(2,3 ,e+1)
sns.histplot(df[i] ,kde=True)
plt.title("distribution of " + i)
plt.tight_layout();
C:\Users\dell\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\dell\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\dell\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\dell\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
C:\Users\dell\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
with pd.option_context('mode.use_inf_as_na', True):
In [7]:
plt.figure(figsize=(20,8))
for e , i in enumerate(df.columns[3:]): #e= index , i =numercal columns
plt.subplot(2,3 ,e+1)
sns.boxplot(x=df[i] )
plt.title("distribution of " + i)
plt.tight_layout();
In [8]:
print("sq_mt")
outlire(data = df.sq_mt_built)
print("n_rooms")
outlire(data = df.n_rooms)
print("n_bathrooms")
outlire(data = df.n_bathrooms)
print("puy_price")
outlire(data = df.buy_price)
print("buy_price_by_area")
outlire(data = df.buy_price_by_area)
sq_mt lb -70.0 ub 306.0 n_rooms lb -1.0 ub 7.0 n_bathrooms lb -0.5 ub 3.5 puy_price lb -650000.0 ub 1614000.0 buy_price_by_area lb -1125.0 ub 8675.0
In [ ]:
In [9]:
px.bar(df.district.value_counts() , title="Distribution of district")
correlation coeffcient¶
In [11]:
corr=df[["sq_mt_built","n_rooms","n_bathrooms"]]
sns.pairplot(corr ,kind="reg")
plt.tight_layout();
C:\Users\dell\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\dell\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead. C:\Users\dell\anaconda3\Lib\site-packages\seaborn\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.
In [12]:
sns.heatmap(corr.corr() ,annot=True)
plt.title("corrleation cofficient between numerical columns")
Out[12]:
Text(0.5, 1.0, 'corrleation cofficient between numerical columns')
In [13]:
corr=df[["buy_price","sq_mt_built","n_rooms","n_bathrooms"]]
sns.heatmap(corr.corr() ,annot=True)
plt.title("corrleation cofficient between numerical columns")
Out[13]:
Text(0.5, 1.0, 'corrleation cofficient between numerical columns')
In [14]:
sns.regplot(data=df ,x="sq_mt_built",y="buy_price")
plt.title("correlation between area (m2) and buy price")
plt.xlabel("area (m2)")
plt.ylabel("buy price");
In [15]:
df.head()
Out[15]:
| Unnamed: 0 | id | district | sq_mt_built | n_rooms | n_bathrooms | buy_price | buy_price_by_area | |
|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 21742 | San Cristóbal | 64.00 | 2 | 1 | 85000 | 1328 |
| 1 | 1 | 21741 | Los Ángeles | 70.00 | 3 | 1 | 129900 | 1856 |
| 2 | 2 | 21740 | San Andrés | 94.00 | 2 | 2 | 144247 | 1535 |
| 3 | 3 | 21739 | San Andrés | 64.00 | 2 | 1 | 109900 | 1717 |
| 4 | 4 | 21738 | Los Rosales | 108.00 | 2 | 2 | 260000 | 2407 |
location or size:what influences house prices in madride¶
do housing prices vary by district if so ,which are the most expensive for purchasin a home ?
In [17]:
mean_price =df.groupby("district")["buy_price"].mean().round(2).sort_values(ascending=False)
mean_price.head(10).plot(kind="bar" ,xlabel ="District" ,ylabel="price",title="mean of puy_price")
Out[17]:
<Axes: title={'center': 'mean of puy_price'}, xlabel='District', ylabel='price'>
In [18]:
mean_price =df.groupby("district")["buy_price_by_area"].mean().round(2).sort_values(ascending=False)
mean_price.head(10).plot(kind="bar" ,xlabel ="District" ,ylabel="price",title="mean of buy_price_by_area")
Out[18]:
<Axes: title={'center': 'mean of buy_price_by_area'}, xlabel='District', ylabel='price'>
In [19]:
reco = df.query('district == "Recoletos"')
In [20]:
reco.sample(10)
Out[20]:
| Unnamed: 0 | id | district | sq_mt_built | n_rooms | n_bathrooms | buy_price | buy_price_by_area | |
|---|---|---|---|---|---|---|---|---|
| 18698 | 19224 | 2518 | Recoletos | 84.00 | 2 | 2 | 730000 | 8690 |
| 19539 | 20074 | 1668 | Recoletos | 247.00 | 4 | 2 | 1700000 | 6883 |
| 18747 | 19274 | 2468 | Recoletos | 219.00 | 5 | 2 | 1970000 | 8995 |
| 18774 | 19301 | 2441 | Recoletos | 176.00 | 3 | 3 | 1460000 | 8295 |
| 19221 | 19752 | 1990 | Recoletos | 232.00 | 3 | 2 | 1950000 | 8405 |
| 19408 | 19943 | 1799 | Recoletos | 221.00 | 4 | 4 | 1700000 | 7692 |
| 19192 | 19723 | 2019 | Recoletos | 241.00 | 4 | 4 | 2500000 | 10373 |
| 19068 | 19598 | 2144 | Recoletos | 220.00 | 4 | 6 | 2400000 | 10909 |
| 19160 | 19691 | 2051 | Recoletos | 247.00 | 2 | 3 | 2200000 | 8907 |
| 18917 | 19445 | 2297 | Recoletos | 404.00 | 5 | 6 | 4350000 | 10767 |
In [21]:
import statsmodels.api as sm
reco['intercept'] = 1
lm = sm.OLS(reco["buy_price"] , reco[["intercept",'sq_mt_built']])
result = lm.fit()
result.summary()
C:\Users\dell\AppData\Local\Temp\ipykernel_17720\337612646.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
Out[21]:
| Dep. Variable: | buy_price | R-squared: | 0.833 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.832 |
| Method: | Least Squares | F-statistic: | 555.1 |
| Date: | Thu, 09 Jan 2025 | Prob (F-statistic): | 5.30e-45 |
| Time: | 17:29:55 | Log-Likelihood: | -1654.1 |
| No. Observations: | 113 | AIC: | 3312. |
| Df Residuals: | 111 | BIC: | 3318. |
| Df Model: | 1 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| intercept | -4.342e+04 | 1.07e+05 | -0.406 | 0.686 | -2.55e+05 | 1.69e+05 |
| sq_mt_built | 8975.6204 | 380.945 | 23.561 | 0.000 | 8220.753 | 9730.488 |
| Omnibus: | 11.498 | Durbin-Watson: | 2.039 |
|---|---|---|---|
| Prob(Omnibus): | 0.003 | Jarque-Bera (JB): | 16.048 |
| Skew: | 0.512 | Prob(JB): | 0.000328 |
| Kurtosis: | 4.536 | Cond. No. | 574. |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [22]:
# المعاملات من الجدول
b0 = -43420 # الثابت
b1 = 8975.62 # معامل sq_mt_built
# قيمة x
x = 300
# حساب السعر
price = b0 + b1 * x
print("السعر المتوقع:", price)
print("predicted price of flat in recoletos with are of {} m2 = {} euro".format(x,price))
السعر المتوقع: 2649266.0000000005 predicted price of flat in recoletos with are of 300 m2 = 2649266.0000000005 euro
MULTILINER Regression model¶
- with quantitive data
In [24]:
reco.head()
Out[24]:
| Unnamed: 0 | id | district | sq_mt_built | n_rooms | n_bathrooms | buy_price | buy_price_by_area | intercept | |
|---|---|---|---|---|---|---|---|---|---|
| 18576 | 19102 | 2640 | Recoletos | 114.00 | 4 | 1 | 750000 | 6579 | 1 |
| 18580 | 19106 | 2636 | Recoletos | 250.00 | 3 | 4 | 2990000 | 11960 | 1 |
| 18584 | 19110 | 2632 | Recoletos | 187.00 | 3 | 3 | 1450000 | 7754 | 1 |
| 18591 | 19117 | 2625 | Recoletos | 417.00 | 7 | 6 | 2500000 | 5995 | 1 |
| 18594 | 19120 | 2622 | Recoletos | 127.00 | 2 | 2 | 990000 | 7795 | 1 |
In [25]:
corr = reco[["buy_price","sq_mt_built","n_rooms","n_bathrooms"]]
sns.heatmap(corr.corr(),annot=True)
plt.title("corrleation confficient numerical colums");
In [26]:
lm = sm.OLS(reco["buy_price"] , reco[["intercept",'sq_mt_built' ,"n_rooms" , "n_bathrooms"]])
result = lm.fit()
result.summary()
Out[26]:
| Dep. Variable: | buy_price | R-squared: | 0.871 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.867 |
| Method: | Least Squares | F-statistic: | 245.2 |
| Date: | Thu, 09 Jan 2025 | Prob (F-statistic): | 2.69e-48 |
| Time: | 17:29:56 | Log-Likelihood: | -1639.7 |
| No. Observations: | 113 | AIC: | 3287. |
| Df Residuals: | 109 | BIC: | 3298. |
| Df Model: | 3 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| intercept | 1.068e+05 | 1.29e+05 | 0.828 | 0.409 | -1.49e+05 | 3.62e+05 |
| sq_mt_built | 9686.1051 | 622.560 | 15.559 | 0.000 | 8452.212 | 1.09e+04 |
| n_rooms | -2.292e+05 | 5.01e+04 | -4.573 | 0.000 | -3.29e+05 | -1.3e+05 |
| n_bathrooms | 1.679e+05 | 4.41e+04 | 3.804 | 0.000 | 8.04e+04 | 2.55e+05 |
| Omnibus: | 13.677 | Durbin-Watson: | 2.217 |
|---|---|---|---|
| Prob(Omnibus): | 0.001 | Jarque-Bera (JB): | 23.442 |
| Skew: | 0.517 | Prob(JB): | 8.12e-06 |
| Kurtosis: | 4.977 | Cond. No. | 804. |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
الجدول الذي بالاعلي لا يحتوي علي نتائج دقيقه وهذا بسبب عدم دقه n_room¶
In [28]:
lm = sm.OLS(reco["buy_price"] , reco[["intercept",'sq_mt_built' , "n_bathrooms"]])
result = lm.fit()
result.summary()
Out[28]:
| Dep. Variable: | buy_price | R-squared: | 0.846 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.843 |
| Method: | Least Squares | F-statistic: | 302.6 |
| Date: | Thu, 09 Jan 2025 | Prob (F-statistic): | 1.92e-45 |
| Time: | 17:29:56 | Log-Likelihood: | -1649.6 |
| No. Observations: | 113 | AIC: | 3305. |
| Df Residuals: | 110 | BIC: | 3313. |
| Df Model: | 2 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| intercept | -2.139e+05 | 1.18e+05 | -1.819 | 0.072 | -4.47e+05 | 1.91e+04 |
| sq_mt_built | 7863.2947 | 519.692 | 15.131 | 0.000 | 6833.387 | 8893.202 |
| n_bathrooms | 1.442e+05 | 4.76e+04 | 3.028 | 0.003 | 4.98e+04 | 2.39e+05 |
| Omnibus: | 23.707 | Durbin-Watson: | 2.165 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 66.942 |
| Skew: | 0.698 | Prob(JB): | 2.91e-15 |
| Kurtosis: | 6.503 | Cond. No. | 668. |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [29]:
area = 300
bath = 3
price = 121200 + 3895.9 * area + 196000 *bath
print("pradicted price of falt in Recoletos with area of {} m2 and {} bathrooms = {} Euro".format(area,bath,price))
pradicted price of falt in Recoletos with area of 300 m2 and 3 bathrooms = 1877970.0 Euro
In [30]:
lm = sm.OLS(reco.buy_price,reco[["intercept","n_rooms","n_bathrooms"]])
result =lm.fit()
result.summary()
Out[30]:
| Dep. Variable: | buy_price | R-squared: | 0.584 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.577 |
| Method: | Least Squares | F-statistic: | 77.32 |
| Date: | Thu, 09 Jan 2025 | Prob (F-statistic): | 1.07e-21 |
| Time: | 17:29:56 | Log-Likelihood: | -1705.8 |
| No. Observations: | 113 | AIC: | 3418. |
| Df Residuals: | 110 | BIC: | 3426. |
| Df Model: | 2 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| intercept | -3.288e+05 | 2.25e+05 | -1.462 | 0.147 | -7.74e+05 | 1.17e+05 |
| n_rooms | 2.701e+05 | 6.88e+04 | 3.927 | 0.000 | 1.34e+05 | 4.06e+05 |
| n_bathrooms | 4.866e+05 | 6.98e+04 | 6.968 | 0.000 | 3.48e+05 | 6.25e+05 |
| Omnibus: | 67.811 | Durbin-Watson: | 2.128 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 364.137 |
| Skew: | 1.987 | Prob(JB): | 8.49e-80 |
| Kurtosis: | 10.845 | Cond. No. | 14.4 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [31]:
room =300
bath =3
price = 208900 + 76030 * room +294200 *bath
print("pradicted price of falt in Recoletos with area of {} m2 and {} bathrooms = {} Euro".format(area,bath,price))
pradicted price of falt in Recoletos with area of 300 m2 and 3 bathrooms = 23900500 Euro
--DUMMY VARIABLES¶
In [33]:
df.head()
Out[33]:
| Unnamed: 0 | id | district | sq_mt_built | n_rooms | n_bathrooms | buy_price | buy_price_by_area | |
|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 21742 | San Cristóbal | 64.00 | 2 | 1 | 85000 | 1328 |
| 1 | 1 | 21741 | Los Ángeles | 70.00 | 3 | 1 | 129900 | 1856 |
| 2 | 2 | 21740 | San Andrés | 94.00 | 2 | 2 | 144247 | 1535 |
| 3 | 3 | 21739 | San Andrés | 64.00 | 2 | 1 | 109900 | 1717 |
| 4 | 4 | 21738 | Los Rosales | 108.00 | 2 | 2 | 260000 | 2407 |
In [34]:
df.district.nunique()
Out[34]:
146
In [35]:
df_dummy = df.query(' district in ("Recoletos","Castellana","Trafalgar") ')
df_dummy.head()
Out[35]:
| Unnamed: 0 | id | district | sq_mt_built | n_rooms | n_bathrooms | buy_price | buy_price_by_area | |
|---|---|---|---|---|---|---|---|---|
| 10604 | 10937 | 10805 | Trafalgar | 68.00 | 2 | 1 | 510000 | 7500 |
| 10667 | 11000 | 10742 | Trafalgar | 103.00 | 2 | 2 | 980000 | 9515 |
| 10668 | 11001 | 10741 | Trafalgar | 103.00 | 2 | 2 | 960000 | 9320 |
| 10676 | 11010 | 10732 | Trafalgar | 95.00 | 4 | 1 | 239000 | 2516 |
| 10708 | 11042 | 10700 | Trafalgar | 67.00 | 2 | 2 | 550000 | 8209 |
In [36]:
pd.get_dummies(df_dummy.district).head()
Out[36]:
| Castellana | Recoletos | Trafalgar | |
|---|---|---|---|
| 10604 | False | False | True |
| 10667 | False | False | True |
| 10668 | False | False | True |
| 10676 | False | False | True |
| 10708 | False | False | True |
In [37]:
df_dummy[["Cast","Reco","Traf"]]=pd.get_dummies(df_dummy["district"], dtype="int")
df_dummy.head()
C:\Users\dell\AppData\Local\Temp\ipykernel_17720\3200272759.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\dell\AppData\Local\Temp\ipykernel_17720\3200272759.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\dell\AppData\Local\Temp\ipykernel_17720\3200272759.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
Out[37]:
| Unnamed: 0 | id | district | sq_mt_built | n_rooms | n_bathrooms | buy_price | buy_price_by_area | Cast | Reco | Traf | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 10604 | 10937 | 10805 | Trafalgar | 68.00 | 2 | 1 | 510000 | 7500 | 0 | 0 | 1 |
| 10667 | 11000 | 10742 | Trafalgar | 103.00 | 2 | 2 | 980000 | 9515 | 0 | 0 | 1 |
| 10668 | 11001 | 10741 | Trafalgar | 103.00 | 2 | 2 | 960000 | 9320 | 0 | 0 | 1 |
| 10676 | 11010 | 10732 | Trafalgar | 95.00 | 4 | 1 | 239000 | 2516 | 0 | 0 | 1 |
| 10708 | 11042 | 10700 | Trafalgar | 67.00 | 2 | 2 | 550000 | 8209 | 0 | 0 | 1 |
In [38]:
df_dummy ["intercept"] =1
lm = sm.OLS(df_dummy.buy_price, df_dummy[["intercept","Cast","Traf"]])
result =lm.fit()
result.summary()
C:\Users\dell\AppData\Local\Temp\ipykernel_17720\3764759753.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
Out[38]:
| Dep. Variable: | buy_price | R-squared: | 0.363 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.360 |
| Method: | Least Squares | F-statistic: | 114.3 |
| Date: | Thu, 09 Jan 2025 | Prob (F-statistic): | 5.20e-40 |
| Time: | 17:29:56 | Log-Likelihood: | -6095.1 |
| No. Observations: | 404 | AIC: | 1.220e+04 |
| Df Residuals: | 401 | BIC: | 1.221e+04 |
| Df Model: | 2 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| intercept | 2.155e+06 | 8.15e+04 | 26.448 | 0.000 | 1.99e+06 | 2.32e+06 |
| Cast | -7.785e+05 | 1.27e+05 | -6.129 | 0.000 | -1.03e+06 | -5.29e+05 |
| Traf | -1.513e+06 | 1.01e+05 | -14.998 | 0.000 | -1.71e+06 | -1.31e+06 |
| Omnibus: | 158.066 | Durbin-Watson: | 2.012 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 761.523 |
| Skew: | 1.636 | Prob(JB): | 4.34e-166 |
| Kurtosis: | 8.876 | Cond. No. | 4.29 |
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
In [ ]: